######################### DATA PROCESSING ############################
######################################################################
#Name of code file: dynamic_data_processing.R
#Purpose: prepare dynamic network diversity and tolerance data for analysis 
#Data In: dynamic_network_tolerance_data.csv
#Data Out: dynamic_network_tolerance_processed_data.csv
######################################################################

#Load Packages
library(readr)

#Set Working Directory
setwd("~/Dropbox/egypt_tolerance_wp_replication/")

#Read in Data
data<-read_csv("data/dynamic_network_tolerance_data.csv")


#Time on Twitter
data$date_start <- as.POSIXct(data$created_at, format="%a %b %d %H:%M:%S %z %Y")
data$date_start<-as.Date(data$date_start)
data$date_now<-as.Date('2016/11/01')
data$twitter_time<- difftime(data$date_now ,data$date_start , units = c("days"))
data$twitter_time<-as.numeric(data$twitter_time)


#Secular/Moderate/Islamist
data$islamist[(data$may15islamist_elite_friends/data$elite_total)>.6] <- 1
data$islamist[is.na(data$islamist)]<-0

data$secular[(data$may15secular_elite_friends/data$elite_total)>.6] <- 1
data$secular[is.na(data$secular)]<-0

data$moderate[data$islamist==0 & data$secular==0]<-1
data$moderate[is.na(data$moderate)]<-0


#Tolerance Proportions for each period
data$prop_tolerant_may15<-data$tolerant_may15/(data$relevant_may15)
data$prop_intolerant_may15<-data$intolerant_may15/(data$tolerant_may15 + data$intolerant_may15)

data$prop_tolerant_may15_6m<-data$tolerant_may15_6m/(data$tolerant_may15_6m + data$intolerant_may15_6m)
data$prop_intolerant_may15_6m<-data$intolerant_may15_6m/(data$tolerant_may15_6m + data$intolerant_may15_6m)

data$prop_tolerant_may16<-data$tolerant_may16/data$relevant_may16
data$prop_intolerant_may16<-data$intolerant_may16/(data$tolerant_may16 + data$intolerant_may16)

#Make 6 month variable 
data$tol_change_6m<-(data$prop_tolerant_may16 - data$prop_tolerant_may15_6m)

data$intol_change_6m<-(data$prop_intolerant_may16 - data$prop_intolerant_may15_6m)

#Make year variable
data$tol_change<-(data$prop_tolerant_may16 - data$prop_tolerant_may15)

data$intol_change<-(data$prop_intolerant_may16 - data$prop_intolerant_may15)

#Tolerance Counts for Each Period 
#Make 6 month variable 
data$tol_count_change_6m<-(data$tolerant_may16 - data$tolerant_may15_6m)

data$intol_count_change_6m<-(data$intolerant_may16 - data$intolerant_may15_6m)

data$rel_count_change_6m<-(data$relevant_may16 - data$relevant_may15_6m)

data$total_change_6m<-(data$total_may16 - data$total_may15_6m)

#Make Year Var
data$tol_count_change<-(data$tolerant_may16 - data$tolerant_may15)

data$intol_count_change<-(data$intolerant_may16 - data$intolerant_may15)

data$rel_count_change<-(data$relevant_may16 - data$relevant_may15)

data$total_change<-(data$total_may16 - data$total_may15)

#Make Logged Vars
data$log_elite<-log(data$elite_total+1)
data$log_total_friends<-log(data$may15total_num_friends+1)
data$log_status_count<-log(data$statuses_count)
data$log_time<-log(data$twitter_time+1)
data$log_relevant<-log(data$relevant_may15_6m+1)

#Add Network Change Vars

data$total_isl_elite_change<-data$oct16islamist_elite_friends - data$may15islamist_elite_friends

data$total_sec_elite_change<-data$oct16secular_elite_friends - data$may15secular_elite_friends

data$total_isl_nonelite_change<-data$oct16nonelite_friends_60pct_islamist - data$may15nonelite_friends_60pct_islamist

data$total_sec_nonelite_change<-data$oct16nonelite_friends_60pct_secular - data$may15nonelite_friends_60pct_secular

data$total_friends_change<-(data$oct16total_num_friends - data$may15total_num_friends)

#May 2015 Elite Network Diversity Variables 
data$elite_total_may15<-data$may15islamist_elite_friends + data$may15secular_elite_friends
data$prop_sec_elite_may15<-data$may15secular_elite_friends/data$elite_total_may15
data$prop_islamist_elite_may15<-data$may15islamist_elite_friends/data$elite_total_may15
data$elite_diversity_may15<- 1- abs(data$prop_islamist_elite_may15 - data$prop_sec_elite_may15)
#Deal with NAs 
data$elite_diversity_may15 <- replace(data$elite_diversity_may15,which(is.na(data$elite_diversity_may15)),1)

#May 2015 Nonelite Network Diversity Variables 
data$nonelite_total_may15<-data$may15nonelite_friends_60pct_islamist + data$may15nonelite_friends_60pct_secular
data$prop_sec_nonelite_may15<-data$may15nonelite_friends_60pct_secular/data$nonelite_total_may15
data$prop_islamist_nonelite_may15<-data$may15nonelite_friends_60pct_islamist/data$nonelite_total_may15
data$nonelite_diversity_may15<- 1- abs(data$prop_islamist_nonelite_may15 - data$prop_sec_nonelite_may15)
#Deal with NAs
data$nonelite_diversity_may15 <- replace(data$nonelite_diversity_may15,which(is.na(data$nonelite_diversity_may15)),1)

#May 2015 Nonelite Network Diversity Variables (With Moderates)
data$moderate_non_elite_may15<-data$may15total_num_friends-data$may15nonelite_friends_60pct_islamist-data$may15nonelite_friends_60pct_secular
data$moderate_non_elite_may15<-data$moderate_non_elite_may15/data$may15total_num_friends
data$nonelite_diversity_mod_may15<-((1-abs(data$prop_sec_nonelite_may15-data$prop_islamist_nonelite_may15)+data$moderate_non_elite_may15))/2

#Oct 2016 Elite Network Diversity Variables 
data$elite_total_oct16<-data$oct16islamist_elite_friends + data$oct16secular_elite_friends
data$prop_sec_elite_oct16<-data$oct16secular_elite_friends/data$elite_total_oct16
data$prop_islamist_elite_oct16<-data$oct16islamist_elite_friends/data$elite_total_oct16
data$elite_diversity_oct16<- 1- abs(data$prop_islamist_elite_oct16 - data$prop_sec_elite_oct16)
#Deal with NAs 
data$elite_diversity_oct16 <- replace(data$elite_diversity_oct16,which(is.na(data$elite_diversity_oct16)),1)

#Oct 2016 Nonelite Network Diversity Variables 
data$nonelite_total_oct16<-data$oct16nonelite_friends_60pct_islamist + data$oct16nonelite_friends_60pct_secular
data$prop_sec_nonelite_oct16<-data$oct16nonelite_friends_60pct_secular/data$nonelite_total_oct16
data$prop_islamist_nonelite_oct16<-data$oct16nonelite_friends_60pct_islamist/data$nonelite_total_oct16
data$nonelite_diversity_oct16<- 1- abs(data$prop_islamist_nonelite_oct16 - data$prop_sec_nonelite_oct16)
#Deal with NAs
data$nonelite_diversity_oct16 <- replace(data$nonelite_diversity_oct16,which(is.na(data$nonelite_diversity_oct16)),1)

#Oct 2016 Nonelite Network Diversity Variables (With Moderates)
data$moderate_non_elite_oct16<-data$oct16total_num_friends-data$oct16nonelite_friends_60pct_islamist-data$oct16nonelite_friends_60pct_secular
data$moderate_non_elite_oct16<-data$moderate_non_elite_oct16/data$oct16total_num_friends
data$nonelite_diversity_mod_oct16<-((1-abs(data$prop_sec_nonelite_oct16-data$prop_islamist_nonelite_oct16)+data$moderate_non_elite_oct16))/2

#Change in Network Diversity (whole period)
data$elite_diversity_total_change<-data$elite_diversity_oct16 - data$elite_diversity_may15
data$nonelite_diversity_total_change<-data$nonelite_diversity_oct16 - data$nonelite_diversity_may15
data$nonelite_diversity_total_change_moderate<-data$nonelite_diversity_mod_oct16-data$nonelite_diversity_mod_may15

#Change in Total Elites
data$total_elite_friend_change<-data$elite_total_oct16 -data$elite_total_may15

#Change in Total Non-Elites
data$total_nonelite_friend_change<-data$nonelite_total_oct16 -data$nonelite_total_may15

#Write CSV
write_csv(data, "data/dynamic_network_tolerance_processed_data.csv")




